package com.jinghang.nginxProject.APP

import com.jinghang.nginxProject.utils.StringUtil
import org.apache.spark.sql.SparkSession

object UserAgentFormat {

  def main(args: Array[String]): Unit = {
    val session = SparkSession
      .builder()
      .appName("UserAgentFormat")
      .master("local[2]")
      .getOrCreate()
    val rdd_nginx = session.sparkContext.textFile("data/10000_access.log")
    //rdd_nginx.take(10).foreach(println)

    val rdd_userAgent = rdd_nginx.map(line => {
//      val index_start = StringUtil.getCharPosition(line, "\"", 7)
//      val index_end = StringUtil.getCharPosition(line, "\"", 8)
//      val userAgent = line.substring(index_start + 1, index_end)
      val userAgent = StringUtil.getUserAgent(line)
      userAgent
    }
    )
    rdd_userAgent.take(10).foreach(println)
    rdd_userAgent.saveAsTextFile("data/output/userAgent")

    session.stop()

  }

}
